Analysis of frog monitoring data from 6 years at Barmah-Millewa forest.
This document is intended as supplementary material to the analysis of acoustic frog data between 2018 and 2024 (6 survey years). This document provides relevant R code to process data from the acoustic classifier, extract relevent environmental variables, run a multi-season multi-species occupancy model and interrogate the results.
Load relevant R packages and state settings for modeling.
Source a range of handy functions for summarily and plotting outputs from a STAN model. These were used in a different project and are stored on github.
# Source useful functions from the deer project github page
req <- GET("https://api.github.com/repos/JustinCally/statewide-deer-analysis/git/trees/main?recursive=1")
stop_for_status(req)
filelist <- unlist(lapply(content(req)$tree, "[", "path"), use.names = F)
function_files <- paste0("https://raw.githubusercontent.com/JustinCally/statewide-deer-analysis/main/",
grep("functions/", filelist, value = TRUE, fixed = TRUE))
sapply(function_files, source, verbose = F)
source("functions/ppc_plots.R")
Below we load in site metadata for the analysis, which includes the location and deployment dates for the audiomoths.
site_metadata <- read_excel("data/TLM_Barmah-Millewa_AM_deployment_metadata_all_years.xlsx")
# site_data_habitat <- read_excel("data/site_data/TLM_habitat_transect_data_all_years.xlsx") %>%
# group_by(Site, Date) %>%
# summarise(across(c("Depth", "Emergent macrophyte", "Submerged macrophyte",
# "Floating macrophyte", "CWD", "Bare ground"), mean)) %>%
# mutate(Year = as.integer(as.factor(year(Date))))
#
# site_data_habitat2 <- read_excel("data/site_data/TLM_water_quality_all_years.xlsx") %>%
# group_by(Site, Date) %>%
# summarise(across(c("Waterbody width (m)", "Waterbody length (m)", "Max depth (cm)",
# "Canopy cover (%)", "Conductivity", "pH", "Water temp (°C)",
# "Oxygen (mg/L)", "Oxygen sat (%)", "mmHg", "Turbidity (FNU)",
# "Turbidity (NTU)"), mean)) %>%
# mutate(Year = as.integer(as.factor(year(Date))))
# Read in water management area data
WMAs <- read_excel("data/WMAs/Sites_WMAs.xlsx")
WMA_scores <- read_excel("data/WMAs/WMA_flood_scores.xlsx")
WMA_joined <- left_join(WMAs, WMA_scores) %>%
select(WMA, `2018-19` = `2018`, `2019-20` = `2019`, `2020-21` = `2020`, `2021-22` = `2021`, `2022-23` = `2022`, `2023-24` = `2023`) %>%
pivot_longer(2:7, names_to = "Monitoring year", values_to = "FloodScore") %>%
distinct()
# Convert site metadata to a spatially explicit dataset and format
site_metadata_sf <- site_metadata %>%
left_join(WMA_joined, by = join_by(WMA, `Monitoring year`)) %>%
st_as_sf(coords = c("X", "Y"), crs = 28355) %>%
mutate(`Site name` = case_when(`Site` == "Warri" ~ "Warri Yards",
`Site` == "Hughes Crossing" ~ "Hughs Crossing",
Site == "Little Rushy Swamp" ~ "Little Rushy",
Site == "Reed Beds Swamp" ~ "Reed Bed Swamp",
Site == "Wathours Lagoon" ~ "Wathours",
TRUE ~ `Site`),
Site = paste(`Site name`, AM, sep = "_AM"),
Site_Year = paste(Site, `Monitoring year`, sep = "_"),
Year = factor(`Monitoring year`) %>% as.integer(),
Cluster = factor(`Site name`) %>% as.integer(),
Loc = factor(Site) %>% as.integer(),
No_survey_nights = as.numeric(No_PAM_nts)) %>%
# arrange(`Site`) %>%
filter(!is.na(No_survey_nights) & No_survey_nights > 0) %>%
arrange(Site_Year) %>%
filter(!(Site_Year %in% c("Rookery_AM1_2022-23", "Two Mile Bunyip_AM2_2021-22")))
unique_sites <- site_metadata_sf %>%
group_by(Site) %>%
slice(1) %>%
select(Site)
# get alist of the nights deployed
nights_dep <- list()
for(i in 1:nrow(site_metadata_sf)) {
nights_dep[[i]] <- data.frame(Site = site_metadata_sf$Site[i],
Site_Year = site_metadata_sf$Site_Year[i],
Date = seq.Date(from = as.Date(site_metadata_sf$`Date start`[i]),
by = "1 day",
length.out = site_metadata_sf$No_survey_nights[i])) #site_metadata_sf$No_survey_nights[i]))
}
nights_dep_joined <- bind_rows(nights_dep)
site_hull <- st_convex_hull(unique_sites %>% st_combine()) %>%
st_transform(3577) %>%
st_buffer(100)
hydro_data <- vicmap_query("open-data-platform:hy_water_area_polygon") %>%
filter(BBOX(site_hull)) %>%
collect()
elev_data <- vicmap_query("open-data-platform:el_contour") %>%
filter(BBOX(site_hull)) %>%
collect()
mapview(hydro_data, zcol = "feature_type_code") + mapview(site_metadata_sf)
sf::st_write(site_hull, "data/barmah_hull/barmah_hull.shp")
sf::st_write(site_hull %>% st_transform(4326), "data/barmah_hull/barmah_hull.geojson")
sf::st_write(unique_sites %>% st_transform(3577) %>% st_buffer(100), "data/site_metadata_sf/site_metadata_sf.shp", overwrite = T)
sf::st_write(unique_sites %>% st_buffer(10, endCapStyle = "SQUARE") %>% st_transform(4326) %>% st_combine(), "data/barmah_hull/site_locs.geojson", overwrite = T)
Read in the hydrological data. This data has been obtained through Digital Earth Austraia sandbox (https://app.sandbox.dea.ga.gov.au/), the code used to extract the hydrologial data and the csvs for each site are available on GitHub: https://github.com/JustinCally/dea-barmah
# load in csv files of hydro conditions
# list.files("https://github.com/JustinCally/dea-barmah/tree/main/output")
# req <- GET("https://api.github.com/repos/JustinCally/dea-barmah/git/trees/main?recursive=1")
# stop_for_status(req)
# filelist <- unlist(lapply(content(req)$tree, "[", "path"), use.names = F)
# csv_files <- paste0("https://raw.githubusercontent.com/JustinCally/dea-barmah/main/",
# grep("output/", filelist, value = TRUE, fixed = TRUE))
# csv_files <- grep(".ipynb_checkpoints", x = csv)
#
# sapply(function_files, source, verbose = F)
#paste
csv_files <- paste0("https://raw.githubusercontent.com/JustinCally/dea-barmah/main/output/", unique(site_metadata_sf$Site), ".csv")
csv_files <- stringr::str_replace_all(csv_files, pattern = " ", replacement = "%20")
hydryo_data <- list()
for(i in 1:length(csv_files)) {
hydryo_data[[i]] <- readr::read_csv(csv_files[i], show_col_types = F) %>%
mutate(Site = unique(site_metadata_sf$Site)[i])
}
hydro_data_combined <- bind_rows(hydryo_data)
Given that the data was often only captured every 2 weeks we want to obtain daily estimates of hydrological conditions. To this we interpolate the hydrological data across the time range of the study (2018 - early 2024). We do this by fitting a polynomial regression with the loess() function.
#hydro interpolation
new_df_raw <- data.frame(date = seq(from = min(hydro_data_combined$date),
to = max(hydro_data_combined$date),
by="day"))
interpolate_data <- list()
for(i in 1:length(csv_files)) {
hd <- hydryo_data[[i]]
mod_pv <- loess(pv ~ as.numeric(date), data = hd, span = 0.075)
mod_npv <- loess(npv ~ as.numeric(date), data = hd, span = 0.075)
mod_bs <- loess(bs ~ as.numeric(date), data = hd, span = 0.075)
mod_wet <- loess(wet ~ as.numeric(date), data = hd, span = 0.075)
mod_water <- loess(water ~ as.numeric(date), data = hd, span = 0.075)
mod_veg_areas <- loess(veg_areas ~ as.numeric(date), data = hd, span = 0.075)
interpolate_data[[i]] <- new_df_raw %>%
mutate(Site = unique(site_metadata_sf$Site)[i],
pv = predict(mod_pv, newdata = new_df_raw),
npv = predict(mod_npv, newdata = new_df_raw),
bs = predict(mod_bs, newdata = new_df_raw),
wet = predict(mod_wet, newdata = new_df_raw),
water = predict(mod_water, newdata = new_df_raw),
veg_areas = predict(mod_veg_areas, newdata = new_df_raw)) %>%
mutate(across(.cols = c(pv,npv,bs, wet, water, veg_areas),
.fns = ~ case_when(.x > 1 ~ 1,
.x < 0 ~ 0,
TRUE ~ .x)))
}
interpolate_combined <- bind_rows(interpolate_data) %>%
mutate(Date = as.Date(date)) %>%
select(-date)
interpolate_data[[62]] %>%
ggplot() +
geom_point(aes(x = date, y = pv)) +
ylim(c(0,1))
Using the National Oceanic and Atmospheric Administration (NOAA) data of gridded daily precipitation and temperature, we extract the daily precipitation and minimum temperature values estimated at each site at a broad spatial scale (0.5 x 0.5 degrees).
# load in daily precip
# https://psl.noaa.gov/data/gridded/data.cpc.globalprecip.html
precipitation_daily <- c(terra::rast("data/climate/precip_2018.nc"),
terra::rast("data/climate/precip_2019.nc"),
terra::rast("data/climate/precip_2020.nc"),
terra::rast("data/climate/precip_2021.nc"),
terra::rast("data/climate/precip_2022.nc"),
terra::rast("data/climate/precip_2023.nc"),
terra::rast("data/climate/precip_2024.nc"))
precipitation_daily_df <- bind_cols(site_metadata_sf$Site_Year,
extract(precipitation_daily, vect(site_metadata_sf)))
colnames(precipitation_daily_df) <- c("Site_Year","ID", as.character(seq.Date(from = as.Date("2018-01-01"),
to = as.Date("2024-05-13"), by = "day")))
precipitation_daily_long <- pivot_longer(precipitation_daily_df,
cols = 3:2327, names_to = "Date", values_to = "Precipitation") %>%
select(-ID) %>%
mutate(Date = as.Date(Date)) %>%
right_join(nights_dep_joined, by = join_by(Site_Year, Date))
# load in daily precip
# https://psl.noaa.gov/data/gridded/data.cpc.globalprecip.html
tmin_daily <- c(terra::rast("data/climate/precip_2018.nc"),
terra::rast("data/climate/tmin_2019.nc"),
terra::rast("data/climate/tmin_2020.nc"),
terra::rast("data/climate/tmin_2021.nc"),
terra::rast("data/climate/tmin_2022.nc"),
terra::rast("data/climate/tmin_2023.nc"),
terra::rast("data/climate/tmin_2024.nc"))
tmin_daily_df <- bind_cols(site_metadata_sf$Site_Year,
extract(tmin_daily, vect(site_metadata_sf)))
colnames(tmin_daily_df) <- c("Site_Year","ID", as.character(seq.Date(from = as.Date("2018-01-01"),
to = as.Date("2024-05-13"), by = "day")))
tmin_daily_long <- pivot_longer(tmin_daily_df,
cols = 3:2327,
names_to = "Date",
values_to = "tmin") %>%
select(-ID) %>%
mutate(Date = as.Date(Date)) %>%
right_join(nights_dep_joined, by = join_by(Site_Year, Date))
Given some of the preceding code can take a while to run we save the output as an rds. This output is the daily values at each site for hydrological and climate conditions as well as the days since the start of spring (1st of September).
daily_vars <- left_join(precipitation_daily_long,
tmin_daily_long) %>%
mutate(Day = lubridate::yday(Date)-243,
Day = case_when(Day < 1 ~ Day + 243 + (365-243),
TRUE ~ Day),
cosDay = 2*pi*Day/365,
sinDay = 2*pi*Day/365) %>%
inner_join(interpolate_combined, by = c("Site", "Date")) %>%
distinct()
saveRDS(daily_vars, "data/intermediate/daily_vars.rds")
# read in daily vars
daily_vars <- readRDS("data/intermediate/daily_vars.rds")
site_vars <- daily_vars %>%
group_by(Site_Year) %>%
summarise(across(c("pv", "npv", "bs", "wet", "water", "veg_areas"), mean)) %>%
ungroup() %>%
mutate(watercomb = wet + water) %>%
left_join(site_metadata_sf %>%
st_drop_geometry() %>%
transmute(Site_Year, WMA, FloodScore = factor(FloodScore)),
by = join_by(Site_Year))
For this analysis we model occupancy and call rate using daily maximum call probability scores. We obtained these scores from the raw model output and processed them in a separate script available in this repository: frog_daily_subset.R. This data comes in two chunks (2018-2021 and 2021-2024).
# Subset to site years
site_year <- nights_dep_joined %>% select(Site, Site_Year, Date) %>%
unique() %>%
left_join(site_metadata_sf %>% st_drop_geometry() %>% select(Site_Year, Year, Cluster, Loc), by = join_by(Site_Year))
# List of species we have data for
species_list <- c(`Barking Marsh Frog` = 13059,
`Eastern Sign-bearing Froglet` = 13131,
`Common Froglet` = 13134,
# `Sloane's Froglet` = 13135,
`Common Spadefoot Toad` = 13086,
`Peron's Tree Frog` = 13204,
`Pobblebonk Frog` = 63913,
`Spotted Marsh Frog (race unknown)` = 13063)
nspecies <- length(species_list)
sp_l_col <- paste0("sp_", species_list)
# species_cols <- str_extract_all(last(colnames(site_records)), "[-+]?[0-9]+")[[1]]
site_records_all <- list()
daily_files <- list.files("data/daily_data/OtherYears/", full.names = T)
files_to_read <- list.files("data/daily_data/OtherYears/")
sp_read_order <- stringr::str_remove_all(files_to_read, c("daily_max_|.csv"))
for(i in 1:length(daily_files)) {
site_records_all[[i]] <- read_csv(daily_files[i], show_col_types = F) %>%
select(-1) %>%
mutate(DateTime = as.POSIXct(DateTime, tz = Sys.timezone()),
Date = coalesce(Date, as.Date(DateTime - hours(12)))) %>%
mutate(`Site` = case_when(Site == "Little Rushy Swamp_AM1" ~ "Little Rushy_AM1",
Site == "Little Rushy Swamp_AM2" ~ "Little Rushy_AM2",
TRUE ~ `Site`)) %>%
mutate(Validated_Grp = as.character(species_list[[sp_read_order[i]]]),
Validation_Species = sp_read_order[i],
Validated_Grp_score = !!sym(sp_read_order[i])) %>%
select(FileId, Filename, Site, Date, DateTime, Orig_Start, Orig_End,
Validated_Grp, Validation_Species, Validated_Grp_score) %>%
left_join(site_year) %>%
filter(!is.na(Year))
}
names(site_records_all) <- sp_read_order
#### 2021-24 ####
site_records_2023 <- list()
daily_files <- list.files("data/daily_data/2021-2024/", full.names = T)
files_to_read <- list.files("data/daily_data/2021-2024/")
sp_read_order <- stringr::str_remove_all(files_to_read, c("daily_max_|.csv"))
for(i in 1:length(daily_files)) {
site_records_2023[[i]] <- read_csv(daily_files[i], show_col_types = F) %>%
select(-1) %>%
mutate(Validated_Grp = as.character(species_list[[sp_read_order[i]]]),
Validation_Species = sp_read_order[i],
Validated_Grp_score = !!sym(sp_read_order[i])) %>%
select(FileId, Filename, Site, Date, DateTime, Orig_Start, Orig_End,
Validated_Grp, Validation_Species, Validated_Grp_score) %>%
left_join(site_year) %>%
filter(!is.na(Year))
}
names(site_records_2023) <- sp_read_order
site_records_combined <- list()
for(x in names(species_list)) {
site_records_combined[[x]] <- bind_rows(site_records_all[[x]], site_records_2023[[x]]) %>%
mutate(SnippetID = paste(FileId, Site, Date, Orig_Start, Orig_End, sep = "_"))
}
Up to 500 daily calls for each species were validated, with validations for each species covering ranges of probability scores from 0 to 1. We read in these validations and visualise the distribution for ‘true positives’ and ‘false positives’.
# site_validation <- read_excel("data/TLM_Barmah-Millewa_2022-23_validations_ALL.xlsx") %>%
# mutate(Site = case_when(Site == "Warri" ~ "Warri Yards",
# TRUE ~ Site),
# Site = paste(Site, AM, sep = "_AM"))
files_to_read <- list.files("data/validated_calls/OtherYears")
sp_read_order <- stringr::str_remove(files_to_read, "_validation.csv")
validated_calls <- list()
for(i in 1:nspecies) {
validated_calls[[sp_read_order[i]]] <- readr::read_csv(paste0("data/validated_calls/OtherYears/", files_to_read[i]),
show_col_types = F) %>%
mutate(Validated_Grp = as.character(species_list[[sp_read_order[i]]]),
Validation_Species = sp_read_order[i],
Validated_Grp_score = !!sym(sp_read_order[i]),
Date = as.Date(Date, "%d/%m/%Y")) %>%
mutate(Detected = as.integer(stringr::str_detect(Validation_TaxonIDs, Validated_Grp)),
SnippetID = paste(FileId, Site, Date, Orig_Start = Orig_Start_Time, Orig_End = Orig_End_Time, sep = "_")) %>%
select(Validated_Grp, Validation_Species, Validated_Grp_score, SnippetID, Detected)
}
files_to_read_2 <- list.files("data/validated_calls/2021-2024")
sp_read_order_2 <- stringr::str_remove(files_to_read_2, "_validation.csv")
validated_calls_2 <- list()
for(i in 1:nspecies) {
validated_calls_2[[sp_read_order_2[i]]] <- validated_calls[[sp_read_order_2[i]]] %>% bind_rows(readr::read_csv(paste0("data/validated_calls/2021-2024/", files_to_read_2[i]),
show_col_types = F) %>%
mutate(Validated_Grp = as.character(species_list[[sp_read_order_2[i]]]),
Validation_Species = sp_read_order_2[i],
Validated_Grp_score = !!sym(sp_read_order_2[i]),
Date = as.Date(Date, "%d/%m/%Y")) %>%
mutate(Detected = as.integer(stringr::str_detect(Validation_TaxonIDs, Validated_Grp)),
SnippetID = paste(FileId, Site, Date, Orig_Start = Orig_Start_Time, Orig_End = Orig_End_Time, sep = "_")) %>%
select(Validated_Grp, Validation_Species, Validated_Grp_score, SnippetID, Detected))
}
records_validated <- list()
for(x in names(species_list)) {
records_validated[[x]] <- left_join(site_records_combined[[x]], validated_calls_2[[x]]) %>%
mutate(Detected = coalesce(Detected, 2L)) %>%
arrange(Site_Year)
}
validated_calls_combined <- bind_rows(records_validated) %>%
filter(Detected != 2) %>%
rowwise() %>%
mutate(Prob_f = case_when(Validated_Grp_score == 1 ~ 0.999,
Validated_Grp_score == 0 ~ 0.001,
TRUE ~ Validated_Grp_score),
Prob_One_M = qlogis(Prob_f))
validated_calls_combined %>%
mutate(Detected = as.factor(Detected)) %>%
filter(Validation_Species != "Common Spadefoot Toad") %>%
ggplot() +
geom_density(aes(x = Prob_f, fill = Detected, colour = Detected), position = "jitter", alpha = 0.25) +
facet_wrap(~Validation_Species) +
xlab("Classifier-assigned probability") +
delwp_theme()
Figure 1: Distributions of probability scores assigned by the classifier
For the model, we filter out common spadefoot toad as we have no validated detections for this species. Below we format the acoustic, site, and nightly data into a format that can be used in the custom STAN model.
# Generate STAN data
sp_out <- c("Common Spadefoot Toad")
# "Pobblebonk Frog",
# "Spotted Marsh Frog (race unknown)")
species_list_cleaned <- species_list[which(!(names(species_list) %in% sp_out))]
records_validated <- records_validated[which(!(names(records_validated) %in% sp_out))]
nspecies_cleaned <- length(species_list_cleaned)
sp_l_col_cleaned <- paste0("sp_", species_list_cleaned)
# joined data
joined_data <- list()
site_data <- list()
# indexes for species
site_names <- unique(records_validated[[1]]$Site_Year)
nsites <- length(unique(records_validated[[1]]$Site_Year))
empty_array <- array(dim = c(nsites, length(species_list_cleaned)))
start_idx_0 <- empty_array
end_idx_0 <- empty_array
start_idx_1 <- empty_array
end_idx_1 <- empty_array
start_idx_2 <- empty_array
end_idx_2 <- empty_array
any_seen <- empty_array
site_idx_start <- empty_array
site_idx_end <- empty_array
call_rate_matrix <- list()
occ_matrix <- list()
joined_data <- readRDS("data/intermediate/joined_data.rds")
for(i in 1:length(species_list_cleaned)) {
#### Because we are randomly sampling ####
# joined_data[[i]] <- records_validated[[i]] %>%
# inner_join(daily_vars %>% na.omit()) %>%
# mutate(Prob_One = Validated_Grp_score,
# Site_f = as.integer(factor(Site_Year)),
# Prob_f = case_when(Prob_One == 1 ~ 0.999,
# Prob_One == 0 ~ 0.001,
# TRUE ~ Prob_One),
# Prob_One_M = qlogis(Prob_f),
# weight = case_when(Detected == 2 ~ 0.01,
# TRUE ~ 1)) %>%
# group_by(Site_f) %>%
# mutate(any_seen = case_when(any(Detected == 1) ~ 1,
# TRUE ~ 0)) %>%
# slice_sample(n = 10, weight_by = weight) %>%
# ungroup() %>%
# arrange(Site_f, Detected)
site_data[[i]] <- joined_data[[i]] %>%
mutate(Loc = as.factor(Site) %>% as.integer(),
Cluster = as.factor(str_remove_all(Site, "_AM1|_AM2")) %>% as.integer()) %>%
select(Site_f, Loc, Cluster, Year, Site_Year, any_seen, Site) %>%
distinct() %>%
arrange(Site_f) %>%
left_join(site_vars, by = "Site_Year")
# occupancy model
psi_form <- ~ scale(sqrt(wet)) + scale(pv) + scale(sqrt(water)) # + FloodScore
occ_matrix[[i]] <- model.matrix(psi_form, data = site_data[[i]])
# Daily precipitation matrix
theta_form <- ~ scale(sqrt(Precipitation)) + scale(tmin) + scale(sqrt(wet)) + scale(sqrt(water)) + scale(pv) #+ cosDay + sinDay
call_rate_matrix[[i]] <- model.matrix(theta_form, data = joined_data[[i]])
for(j in 1:nsites) {
start_idx_0[j,i] <- min(which(joined_data[[i]]$Site_f == j & joined_data[[i]]$Detected == 0))
end_idx_0[j,i] <- max(which(joined_data[[i]]$Site_f == j & joined_data[[i]]$Detected == 0))
start_idx_1[j,i] <- min(which(joined_data[[i]]$Site_f == j & joined_data[[i]]$Detected == 1))
end_idx_1[j,i] <- max(which(joined_data[[i]]$Site_f == j & joined_data[[i]]$Detected == 1))
start_idx_2[j,i] <- min(which(joined_data[[i]]$Site_f == j & joined_data[[i]]$Detected == 2))
end_idx_2[j,i] <- max(which(joined_data[[i]]$Site_f == j & joined_data[[i]]$Detected == 2))
site_idx_start[j,i] <- min(which(joined_data[[i]]$Site_f == j))
site_idx_end[j,i] <- max(which(joined_data[[i]]$Site_f == j))
}
start_idx_0[is.infinite(start_idx_0[,i]),i] <- 0
start_idx_1[is.infinite(start_idx_1[,i]),i] <- 0
start_idx_2[is.infinite(start_idx_2[,i]),i] <- 0
end_idx_0[is.infinite(end_idx_0[,i]),i] <- 0
end_idx_1[is.infinite(end_idx_1[,i]),i] <- 0
end_idx_2[is.infinite(end_idx_2[,i]),i] <- 0
any_seen[,i] <- as.integer(start_idx_1[,i] != 0)
}
# saveRDS(joined_data, "data/intermediate/joined_data.rds")
In addition to the validation data, we also have previous validations at a site-level that notes whether any calls were detected at that site in that year. These are included in the model as if restricts the probability space we need to marginalize over for sites that have at least one detection.
# read in updated any seen validations
any_seen_add <- readr::read_csv("data/any_seen/sp_detect_per_site_per_year.csv") %>%
mutate(`Site_AM` = case_when(Site_AM == "Little Rushy Swamp_AM1" ~ "Little Rushy_AM1",
Site_AM == "Little Rushy Swamp_AM2" ~ "Little Rushy_AM2",
Site_AM == "Hughes Crossing_AM1" ~ "Hughs Crossing_AM1",
Site_AM == "Hughes Crossing_AM2" ~ "Hughs Crossing_AM2",
Site_AM == "Reed Beds Swamp_AM1" ~ "Reed Bed Swamp_AM1",
Site_AM == "Reed Beds Swamp_AM2" ~ "Reed Bed Swamp_AM2",
Site_AM == "Wathours Lagoon_AM1" ~ "Wathours_AM1",
Site_AM == "Wathours Lagoon_AM2" ~ "Wathours_AM2",
TRUE ~ `Site_AM`)) %>%
mutate(Site_Year = paste(Site_AM, Year, sep = "_")) %>%
arrange(Site_Year)
setdiff(site_names, any_seen_add$Site_Year) %>% paste(collapse = "\n") %>% cat()
any_seen_clean <- any_seen_add %>% filter(Site_Year %in% site_names) %>%
select(`Barking Marsh Frog`,
`Eastern Sign-bearing Froglet` = `Eastern Sign-Bearing Froglet`,
`Common Froglet`,
`Peron's Tree Frog`,
`Pobblebonk Frog` = Pobblebonk,
`Spotted Marsh Frog (race unknown)` = `Spotted Marsh Frog`) %>%
as.matrix()
any_seen_joined <- any_seen_clean
for(j in 1:ncol(any_seen_clean)) {
for(i in 1:nrow(any_seen_clean))
any_seen_joined[i,j] <- any_seen[i,j] #max(any_seen[i,j], any_seen_clean[i,j])
}
We then combine all the data into a list, which will be used by the STAN model
nfiles <- nrow(joined_data[[1]])
scores <- lapply(joined_data, function(x) pull(x, Prob_One_M))
site_list <- lapply(joined_data, function(x) pull(x, Site_f))
loc_code <- lapply(site_data, function(x) pull(x, Loc))
cluster_code <- lapply(site_data, function(x) pull(x, Cluster))
year_code <- lapply(site_data, function(x) pull(x, Year))
detected <- lapply(joined_data, function(x) pull(x, Detected))
site_code <- lapply(joined_data, function(x) pull(x, Site_f))
nyear <- length(unique(unlist(year_code)))
nloc <- length(unique(unlist(loc_code)))
nclusters <- length(unique(unlist(cluster_code)))
stan_data <- list(nfiles = nfiles,
nsites = nsites,
nspec = nspecies_cleaned,
nyear = nyear,
nloc = nloc,
nclusters = nclusters,
score = array(unlist(scores),dim=c(nfiles,nspecies_cleaned)),
site_code = array(unlist(site_code),dim=c(nfiles,nspecies_cleaned)),
loc_code = array(unlist(loc_code),dim=c(nsites,nspecies_cleaned)),
cluster_code = array(unlist(cluster_code),dim=c(nsites,nspecies_cleaned)),
year_code = array(unlist(year_code),dim=c(nsites,nspecies_cleaned)),
year_counts = unname(apply(array(unlist(year_code),dim=c(nsites,nspecies_cleaned)),2,table)),
year_counts_file = unname(as.integer(table(joined_data[[1]]$Year))),
site_idx_start = site_idx_start,
site_idx_end = site_idx_end,
start_idx_0 = start_idx_0,
end_idx_0 = end_idx_0,
start_idx_1 = start_idx_1,
end_idx_1 = end_idx_1,
start_idx_2 = start_idx_2,
end_idx_2 = end_idx_2,
any_seen = any_seen_joined,
theta_mm = call_rate_matrix,
theta_nc = ncol(call_rate_matrix[[1]]),
psi_mm = occ_matrix,
psi_nc = ncol(occ_matrix[[1]]),
detected = array(unlist(detected),dim=c(nfiles,nspecies_cleaned)))
# stan_data$year_code[stan_data$year_code == 5] <- 4
stan_data_beta <- stan_data
stan_data_beta$score <- array(unlist(lapply(joined_data, function(x) pull(x, Prob_f))),dim=c(nfiles,nspecies_cleaned))
saveRDS(stan_data, "data/stan_data.rds")
Below is the STAN model used in our analysis. It is a mult-species, multi-season model.
data {
int<lower=0> nfiles; // number of files to process (observation periods)
int<lower=0> nsites; // number of site-year combos
int<lower=0> nspec; // number of species
int<lower=0> nyear; // number of years
int<lower=0> nclusters; // number of clusters (pair of recorders)
int<lower=0> nloc; // number of site locations (independent of years)
array[nfiles, nspec] real score; // occupancy detection score
array[nfiles, nspec] int detected; // whether or not was deteted
array[nfiles, nspec] int site_code; // file to site code
array[nsites, nspec] int loc_code;
array[nsites, nspec] int cluster_code;
array[nsites, nspec] int year_code;
array[nyear, nspec] real year_counts;
real year_counts_file[nyear];
array[nsites, nspec] int site_idx_start;
array[nsites, nspec] int site_idx_end;
int start_idx_0[nsites, nspec];
int end_idx_0[nsites, nspec];
int start_idx_1[nsites, nspec];
int end_idx_1[nsites, nspec];
int start_idx_2[nsites, nspec];
int end_idx_2[nsites, nspec];
int any_seen[nsites, nspec];
int<lower=0> theta_nc;
array[nspec] matrix[nfiles, theta_nc] theta_mm; // model matrix
int<lower=0> psi_nc;
array[nspec] matrix[nsites, psi_nc] psi_mm; // occ model matrix
}
// The parameters accepted by the model. Our model
// accepts two parameters 'mu' and 'sigma'.
parameters {
// mu
real mu_int[2];
real sigma_int[2];
// re for mu
array[2, nspec] real mu_raw;
real<lower=0> mu_sd[2];
// re for sigma
array[2, nspec] real sigma_raw;
real<lower=0> sigma_sd[2];
// real alpha[nspec];
// location random effect
array[nloc, nspec] real loc_raw;
real<lower=0> loc_sd[nspec];
// year random effect
array[nyear, nspec] real year_raw;
real<lower=0> year_sd[nspec];
// site random effect
array[nsites, nspec] real site_raw;
real<lower=0> site_sd[nspec];
// files random effect
// array[nfiles, nspec] real file_raw;
// real<lower=0> file_sd[nspec];
// call rate coefficient
array[nspec] vector[theta_nc] beta_theta; //call rate
// occ coef
array[nspec] vector[psi_nc] beta_psi; //call rate
}
transformed parameters {
array[nsites, nspec] real<lower=0,upper=1> psi;
array[nspec] vector<lower=0,upper=1>[nfiles] theta;
real<lower=0> sigma[2, nspec];
real sigma_var[2, nspec];
// random effects
array[nloc, nspec] real eps_loc;
array[nyear, nspec] real eps_year;
array[nsites, nspec] real eps_site;
array[2, nspec] real mu_mean;
array[2, nspec] real<lower=0> mu;
for(j in 1:nspec) {
// mu and sigma beta param
mu_mean[1,j] = mu_int[1] + mu_sd[1] * mu_raw[1,j];
mu_mean[2,j] = mu_int[2] + mu_sd[2] * mu_raw[2,j];
// linear model for sigma
sigma_var[1,j] = sigma_int[1] + sigma_sd[1] * sigma_raw[1,j];
sigma_var[2,j] = sigma_int[2] + sigma_sd[2] * sigma_raw[2,j];
// beta parameterization
mu[1,j] = inv_logit(mu_mean[1,j]) .* exp(sigma_var[1,j]);
mu[2,j] = inv_logit(mu_mean[2,j]) .* exp(sigma_var[2,j]);
sigma[1,j] = ((1-inv_logit(mu_mean[1,j])) .* exp(sigma_var[1,j]));
sigma[2,j] = ((1-inv_logit(mu_mean[2,j])) .* exp(sigma_var[2,j]));
// locations
for(l in 1:nloc) {
eps_loc[l,j] = loc_sd[j] * loc_raw[l,j];
}
// years
for(y in 1:nyear) {
eps_year[y,j] = year_sd[j] * year_raw[y,j];
}
for(i in 1:nsites) {
eps_site[i,j] = site_sd[j] * site_raw[i,j];
psi[i,j] = inv_logit(psi_mm[j, i] * beta_psi[j] + eps_loc[loc_code[i,j], j] + eps_year[year_code[i,j], j]); // + eps_site[i,j]);
}
for(k in 1:nfiles) {
theta[j,k] = inv_logit(theta_mm[j,k] * beta_theta[j] + eps_site[site_code[k,j],j]); // + (file_sd[j] * to_vector(file_raw[,j])));
}
}
}
model {
// priors
for(j in 1:nspec) {
// alpha[j] ~ normal(0,2);
// re priors
loc_raw[,j] ~ normal(0,1);
loc_sd[j] ~ normal(0,1);
year_raw[,j] ~ normal(0,1);
year_sd[j] ~ normal(0,1);
site_raw[,j] ~ normal(0,1);
site_sd[j] ~ normal(0,1);
// file_raw[,j] ~ normal(0,1);
// file_sd[j] ~ normal(0,1);
beta_theta[j] ~ normal(0,2);
beta_psi[j] ~ normal(0,2);
}
// mu priors
mu_int[1] ~ normal(0,10);
mu_int[2] ~ normal(0,10);
mu_raw[1,] ~ normal(0,1);
mu_raw[2,] ~ normal(0,1);
mu_sd ~ normal(0,1);
//sigma priors
sigma_int[1] ~ normal(0,3);
sigma_int[2] ~ normal(0,3);
sigma_raw[1,] ~ normal(0,1);
sigma_raw[2,] ~ normal(0,1);
sigma_sd ~ normal(0,1);
for(j in 1:nspec) {
for(i in 1:nsites) {
if(start_idx_0[i,j] != 0) {
if(any_seen[i,j] == 0) {
target += log1m(psi[i,j]) + beta_lpdf(score[start_idx_0[i,j]:end_idx_0[i,j],j] | mu[1, j], sigma[1,j]);
}
target += log(psi[i,j]) + log1m(theta[j, start_idx_0[i,j]:end_idx_0[i,j]])
+ beta_lpdf(score[start_idx_0[i,j]:end_idx_0[i,j],j] | mu[1, j], sigma[1,j]);
}
if(start_idx_1[i,j] != 0) {
target += log(psi[i,j]) + log(theta[j, start_idx_1[i,j]:end_idx_1[i,j]])
+ beta_lpdf(score[start_idx_1[i,j]:end_idx_1[i,j],j] | mu[2, j], sigma[2,j]);
}
if(start_idx_2[i,j] != 0) {
if(any_seen[i,j] == 0) {
target += log1m(psi[i,j])
+ beta_lpdf(score[start_idx_2[i,j]:end_idx_2[i,j],j] | mu[1, j], sigma[1,j]);
}
target += log_sum_exp(log(psi[i,j])
+ log_sum_exp(log1m(theta[j, start_idx_2[i,j]:end_idx_2[i,j]]))
+ beta_lpdf(score[start_idx_2[i,j]:end_idx_2[i,j],j] | mu[1, j], sigma[1,j]),
log(psi[i,j])
+ log_sum_exp(log(theta[j, start_idx_2[i,j]:end_idx_2[i,j]]))
+ beta_lpdf(score[start_idx_2[i,j]:end_idx_2[i,j],j] | mu[2, j], sigma[2,j]));
}
}
}
}
generated quantities {
int<lower=0,upper=1> zm[nsites, nspec];
int<lower=0,upper=1> z[nsites, nspec];
int<lower=0> z_sum[nyear, nspec];
array[nyear, nspec] real z_hat;
int<lower=0> theta_sum[nyear, nspec];
array[nyear, nspec] real theta_hat;
array[nsites, nspec] real theta_site;
array[nsites, nspec] real rel_ab;
array[nyear, nspec] real rel_ab_sum;
array[nyear, nspec] real rel_ab_mean;
real mu_pred[2,nspec];
// real theta_phen [365,nspec];
array[nspec] vector<lower=0,upper=1>[nfiles] det_pred;
array[nfiles, nspec] int<lower=0,upper=1> det_z;
array[nfiles, nspec] real score_pred;
array[nsites] int<lower=0> species_richness;
for(j in 1:nspec) {
zm[,j] = bernoulli_rng(psi[,j]);
mu_pred[, j] = beta_rng(mu[,j], sigma[,j]);
// for(i in 1:365) {
// // last two predictors must be cos and sin days
// theta_phen[i,j] = inv_logit(beta_theta[j,1] + beta_theta[j,theta_nc-1]*cos(2*pi()*i/365) + beta_theta[j,theta_nc]*sin(2*pi()*i/365));
// }
for(y in 1:nyear) z_sum[y,j] = 0;
for(y in 1:nyear) theta_sum[y,j] = 0;
for(y in 1:nyear) rel_ab_sum[y,j] = 0;
for(n in 1:nsites) {
det_pred[j, site_idx_start[n,j]:site_idx_end[n,j]] = psi[n,j] * theta[j,site_idx_start[n,j]:site_idx_end[n,j]];
det_z[site_idx_start[n,j]:site_idx_end[n,j],j] = bernoulli_rng(det_pred[j,site_idx_start[n,j]:site_idx_end[n,j]]);
}
for(k in 1:nfiles) {
if(detected[k,j] != 2) {
det_z[k,j] = detected[k,j];
}
if(det_z[k,j] == 0) {
score_pred[k,j] = beta_rng(mu[1, j], sigma[1, j]);
} else if(det_z[k,j] == 1) {
score_pred[k,j] = beta_rng(mu[2, j], sigma[2, j]);
}
}
for(n in 1:nsites) {
theta_site[n,j] = mean(det_z[site_idx_start[n,j]:site_idx_end[n,j],j]);
theta_sum[year_code[n,j],j] += sum(det_z[site_idx_start[n,j]:site_idx_end[n,j],j]);
z[n,j] = max(zm[n,j], any_seen[n,j]);
rel_ab[n,j] = z[n,j]*theta_site[n,j];
rel_ab_sum[year_code[n,j],j] += rel_ab[n,j];
z_sum[year_code[n,j],j] += z[n,j];
}
}
for(j in 1:nspec) {
for(y in 1:nyear) {
z_hat[y,j] = z_sum[y,j]/year_counts[y,j];
rel_ab_mean[y,j] = rel_ab_sum[y,j]/year_counts[y,j];
theta_hat[y,j] = theta_sum[y,j]/year_counts_file[y];
}
}
// site-level community score
for(n in 1:nsites) {
species_richness[n] = sum(z[n,]);
}
}# cont_model_ms_norm <- cmdstan_model("stan/continuous_model_ms_my_norm.stan")
cont_model_ms_beta <- cmdstan_model("stan/continuous_model_ms_my_beta.stan")
ni <- 150 #samples
nw <- 150 #warmups
nc <- 8 #chains
fit_beta <- cont_model_ms_beta$sample(data = stan_data_beta,
chains = nc,
parallel_chains = nc,
show_messages = TRUE,
save_warmup = FALSE,
iter_sampling = ni,
iter_warmup = nw, refresh = 50)
fit_beta$save_object("outputs/fit_beta.rds")
fit_beta <- readRDS("outputs/fit_beta.rds")
Posterior predictive checks can be used to compare model predictions to the original data supplied to the model. Here we compare summary statistics for the probability scores supplied to the model. Ideally, predictions from the model are able to align with the supplied data. We compare 25%, 50% and 75% confidence intervals for the ‘probability scores’.
q95 <- function(x) quantile(x, 0.95, na.rm = T)
q25 <- function(x) quantile(x, 0.25, na.rm = T)
q50 <- function(x) quantile(x, 0.5, na.rm = T)
q75 <- function(x) quantile(x, 0.75, na.rm = T)
sd_90 <- function (x, na.rm = FALSE) {
quants <- quantile(x, c(0.05, 0.95), na.rm = T)
x <- x[x < quants[2] & x > quants[1]]
sqrt(var(if (is.vector(x) || is.factor(x)) x else as.double(x),
na.rm = na.rm))
}
species_list_cleaned2 <- species_list_cleaned
names(species_list_cleaned2)[6] <- "Spotted Marsh Frog"
ppc_plots_25 <- list()
ppc_plots_50 <- list()
ppc_plots_75 <- list()
for(i in 1:length(species_list_cleaned)) {
ppc_plots_25[[i]] <- posterior_checks_multispecies(model = fit_beta,
model_data = joined_data,
species_index = i,
stat = "q25",
title = paste0("25% quantile ", names(species_list_cleaned2)[i]))
ppc_plots_50[[i]] <- posterior_checks_multispecies(model = fit_beta,
model_data = joined_data,
species_index = i,
stat = "q50",
title = paste0("50% quantile ", names(species_list_cleaned2)[i]))
ppc_plots_75[[i]] <- posterior_checks_multispecies(model = fit_beta,
model_data = joined_data,
species_index = i,
stat = "q75",
title = paste0("75% quantile ", names(species_list_cleaned2)[i]))
}
ppc_grid <- cowplot::plot_grid(plotlist = c(ppc_plots_25, ppc_plots_50, ppc_plots_75), nrow = 6, byrow = F)
ggsave(plot = ppc_grid, filename = "outputs/plots/ppcs_beta.pdf", width = 6000, height = 8000, units = "px")
ppc_grid
Figure 2: Posterior predictive checks for the 25%, 50%, and 75% quantiles of backpredicted classifier probability scores against the observed quantiles from validated data
Using our model we can visualise how our statistical model is able to delineate validated calls into detections an non-detections. Our model determines the performance of the acoustic classifier by the estimation of two hierarchical parameters for each species. These parameters estimate the ‘classifier probability score’ (\(\mu\)) when there is no call present in the snippet (\(\mu_{det\ =\ 0}\)) and when there is a call present in the snippet (\(\mu_{det\ =\ 1}\)). At the very least, the classifier should have a higher probability score for detections than non-detections (\(\mu_{det\ =\ 1}\) > \(\mu_{det\ =\ 0}\)).
mu_summary <- fit_beta$draws("mu_pred", format = "df")
mu_summary_long <- pivot_longer(mu_summary, cols = 1:12)
mu_summary_long$`Validated detection` <- rep(c("Non-detection", "Detection"), length.out = nrow(mu_summary_long))
mu_summary_long$Species <- rep(names(species_list_cleaned), each = 2, length.out = nrow(mu_summary_long))
ggplot(data = mu_summary_long) +
stat_density_ridges(aes(x = value,
y = `Validated detection`,
fill = 0.5 - abs(0.5 - stat(ecdf))),
quantile_lines = TRUE, quantiles = 2, geom = "density_ridges_gradient", calc_ecdf = TRUE) +
scale_fill_gradient(name = "Posterior density", low = delwp_cols[3], high = delwp_cols[1]) +
scale_x_continuous(expand = c(0, 0), limits = c(0,1), name = "Classifier probability score") +
scale_y_discrete(expand = c(0, 0)) +
coord_cartesian(clip = "off") +
facet_wrap(~Species) +
delwp_theme() +
theme(panel.spacing = unit(2, "lines"))
Figure 3: Fitted beta-distributions of acoustic classifier performance for scores when a snippet had a detection vs non-detection
Based on these distributions we see that the classifier is more often than not able to delineate true calls from unlikely calls. However. we also see that these distributions overlap. This means there is a relatively large probaility space where a ‘classifier probability score’ could reasonable both be a non-detection or a detection.
We investigated whether several hydrological covariates impacted site occupancy for each species. These variables can vary across time (over survey years) and across space (over different sites).
# beta_summaries
beta_summaries <- fit_beta$summary("beta_psi")
beta_table_summary <- beta_summaries %>%
mutate(species = factor(rep(names(species_list_cleaned), times = 4), levels = names(species_list_cleaned)),
covariate = rep(c("(Intercept)",
"Wetness",
"Photosynthetic Vegetation",
"Water"), each = length(species_list_cleaned))) %>%
arrange(species) %>%
select(species, covariate, mean, sd, q5, q95)
format_cov <- function(sp, b, data = beta_summaries, variable = "beta_psi") {
fd <- data %>%
filter(variable == !!paste0(variable, "[", sp, ",",b+1,"]"))
paste0("$\\beta$ = ",
round(fd$mean, 2),
" [90% CI: ",
round(fd$q5, 2),
" – ",
round(fd$q95, 2),
"]")
}
ab_joined_list <- list()
for(j in 1:length(species_list_cleaned)) {
beta_draws <- fit_beta$draws("beta_psi", format = "df")
which_sp <- which(stringr::str_detect(string = colnames(beta_draws),
pattern = paste0("beta_psi\\[", j)))
beta_draws <- beta_draws[,which_sp]
ab_marginal_effects <- list()
ab_plot <- list()
ab_to_use <- psi_form
phi_vars <- unlist(stringr::str_remove_all(string = labels(terms(psi_form)), pattern = "log|scale|sqrt|\\)|\\("))
phi_logs <- unlist(stringr::str_detect(string = labels(terms(psi_form)), pattern = "log\\("))
phi_sqrts <- c(0.5,1,0.5)
phi_labs <- c("Wetness",
"Photosynthetic Vegetation",
"Water")
fac <- c(FALSE, FALSE, FALSE)
for(i in 1:length(phi_vars)) {
ab_marginal_effects[[i]] <- marginal_effects_cmd(beta_draws,
param = "beta_psi", species_index = j,
param_number = i+1, log = phi_logs[i],
model_data = site_data[[j]],
abundance = FALSE,
pwr = phi_sqrts[i],
model_column = phi_vars[i],
transition = FALSE) %>%
mutate(species = names(species_list_cleaned)[j])
}
ab_joined_list[[j]] <- bind_rows(ab_marginal_effects)
}
all_me_data <- bind_rows(ab_joined_list) %>%
mutate(param = case_when(param == "wet" ~ "Wetness",
param == "pv" ~ "Photosynthetic Vegetation",
param == "water" ~ "Water"),
species = case_when(species == "Spotted Marsh Frog (race unknown)" ~ "Spotted Marsh Frog", TRUE ~ species))
cov_plot <- marginal_effects_plot_cmd_all(all_me_data %>%
mutate(param = factor(param, levels = phi_labs)),
col = "DarkGreen",
factor = FALSE,
ylab = "Contribution to occupancy") +
ggplot2::facet_grid(species~param, scales = "free") +
delwp_theme() +
theme(strip.text = ggtext::element_markdown()) +
xlab("Covariate value")
ggsave(plot = cov_plot, filename = "outputs/plots/occupancy_covariate_effects.png", width = 2000, height = 3000, units = "px")
cov_plot
Figure 4: Marginal response curves for the various fixed-effect occupancy parameters used in the model.
We also model how various covariates impact nightly/daily calling activity. Here we look at nightly precipitation, minimum temperature, current hydrological ‘wetness’, ‘water’ and ‘photosynthetic vegetation’ cover.
# beta_summaries
theta_summaries <- fit_beta$summary("beta_theta")
theta_table_summary <- theta_summaries %>%
mutate(species = factor(rep(names(species_list_cleaned), times = 6), levels = names(species_list_cleaned)),
covariate = rep(c("(Intercept)",
"Precipitation",
"Minimum Temperature",
"Wetness",
"Water",
"Photosynthetic Vegetation"), each = length(species_list_cleaned))) %>%
arrange(species) %>%
select(species, covariate, mean, sd, q5, q95)
ab_joined_list <- list()
for(j in 1:length(species_list_cleaned)) {
theta_draws <- fit_beta$draws("beta_theta", format = "df")
which_sp <- which(stringr::str_detect(string = colnames(theta_draws),
pattern = paste0("beta_theta\\[", j)))
beta_draws <- theta_draws[,which_sp]
ab_marginal_effects <- list()
ab_plot <- list()
ab_to_use <- theta_form
phi_vars <- unlist(stringr::str_remove_all(string = labels(terms(ab_to_use)), pattern = "log|scale|sqrt|\\)|\\("))
phi_logs <- unlist(stringr::str_detect(string = labels(terms(ab_to_use)), pattern = "log\\("))
phi_sqrts <- c(1,1,1,1,1)
phi_labs <- c("Precipitation",
"Minimum Temperature",
"Wetness",
"Water",
"Photosynthetic Vegetation")
fac <- c(FALSE, FALSE, FALSE, FALSE, FALSE)
for(i in 1:length(phi_vars)) {
ab_marginal_effects[[i]] <- marginal_effects_cmd(theta_draws,
param = "beta_theta", species_index = j,
param_number = i+1, log = phi_logs[i],
model_data = joined_data[[j]],
abundance = FALSE,
pwr = phi_sqrts[i],
model_column = phi_vars[i],
transition = FALSE) %>%
mutate(species = names(species_list_cleaned)[j])
}
ab_joined_list[[j]] <- bind_rows(ab_marginal_effects)
}
all_theta_data <- bind_rows(ab_joined_list) %>%
mutate(param = case_when(param == "wet" ~ "Wetness",
param == "pv" ~ "Photosynthetic Vegetation",
param == "water" ~ "Water",
param == "tmin" ~ "Minimum Temperature",
TRUE ~ param),
species = case_when(species == "Spotted Marsh Frog (race unknown)" ~ "Spotted Marsh Frog", TRUE ~ species))
theta_plot <- marginal_effects_plot_cmd_all(all_theta_data %>%
mutate(param = factor(param, levels = phi_labs)),
col = "DarkGreen",
factor = FALSE,
ylab = "Contribution to call rate") +
ggplot2::facet_grid(species~param, scales = "free") +
delwp_theme() +
theme(strip.text = ggtext::element_markdown()) +
xlab("Covariate value")
ggsave(plot = theta_plot, filename = "outputs/plots/call_rate_covariate_effects.png", width = 2800, height = 3000, units = "px")
theta_plot
Figure 5: Marginal response curves for the various fixed-effect call-rate parameters used in the model.
Using our model, we estimate how occupancy rates vary for each species, and whether there is variation in these over the years.
zhat <- fit_beta$summary("z_hat")
zhat$year <- rep(sort(unique(site_metadata_sf$`Monitoring year`)), times = length(species_list_cleaned))
zhat$species <- rep(names(species_list_cleaned), each = length(unique(site_metadata_sf$`Monitoring year`)))
zhat_draws <- fit_beta$draws("z_hat", format = "matrix") %>%
as.data.frame() %>%
pivot_longer(cols = everything()) %>%
mutate(year = rep(sort(unique(site_metadata_sf$`Monitoring year`)), length.out = nrow(.)),
species = rep(names(species_list_cleaned),
each = length(unique(site_metadata_sf$`Monitoring year`)),
length.out = nrow(.)))
dfas <- list()
for(i in 1:nspecies_cleaned) {
dfas[[i]] <- data.frame(any_seen = stan_data$any_seen[,i],
yearf = stan_data$year_code[,i]) %>%
group_by(yearf) %>%
summarise(det = sum(any_seen),
tot = n(),
rate = det/tot) %>%
ungroup() %>%
mutate(year = sort(unique(site_metadata_sf$`Monitoring year`)),
species = names(species_list_cleaned)[i])
}
dfas_comb <- bind_rows(dfas) %>%
mutate(Validated = "Proportion of sites with validated occupancy")
zhat_draws %>%
ggplot() +
geom_violin(aes(x = year, y = value), draw_quantiles = c(0.5), adjust = 2, fill = "grey95") +
geom_point(aes(x = year, y = rate, fill = Validated), data = dfas_comb, shape = 21, size = 3) +
facet_wrap(~species, nrow = 3) +
ylab("Proportion of songmeter sites occupied") +
xlab("Survey year") +
scale_fill_manual(values = unname(delwp_cols[3])) +
delwp_theme() +
theme(legend.position = "top", legend.title = element_text(size = 0), legend.text = element_text(size = 12))
Figure 6: Estimated occupancy rates accross all songmeter sites per year for each species
We can also estimate the average detected calling activities over the years. This estimate is the average across all sites surveyed that year and represents the average nightly detected call rate. Note that this ‘call rate’ may be a mixture of the acoustic devices ability to detect the call, the calling activity as well as the abundance of frogs at that site. Note that these estimates are conditional upon the site being occupied. Thus an unoccupied site would not have corresponding call rate estimated.
theta_hat <- fit_beta$summary("theta_hat")
theta_hat$year <- zhat$year
theta_hat$species <- zhat$species
theta_hat_draws <- fit_beta$draws("theta_hat", format = "matrix") %>%
as.data.frame() %>%
pivot_longer(cols = everything()) %>%
mutate(year = rep(sort(unique(site_metadata_sf$`Monitoring year`)), length.out = nrow(.)),
species = rep(names(species_list_cleaned),
each = length(unique(site_metadata_sf$`Monitoring year`)),
length.out = nrow(.)))
theta_hat_draws %>%
ggplot() +
geom_violin(aes(x = year, y = value), adjust = 2, draw_quantiles = 0.5, fill = "grey95") +
facet_wrap(~species, nrow = 3) +
scale_y_continuous(name = "Average calling activity (nightly call probability)", limits = c(0,0.52)) +
xlab("Survey year") +
delwp_theme()
Figure 7: Estimated call rates accross all songmeter sites per year for each species, conditional upon the site being occupied
Given that call rate is dependent upon the site being occupied, the estimated call rate may be better interpreted alongside occupancy in order to measure the relative frog activity or abundance at the site. A score of 1 means that every site is predicted to be occupied and the species is calling every night. This metric might be beneficial in comparing relative activity and abundance between species and across years.
rel_ab_draws <- fit_beta$draws("rel_ab_mean", format = "matrix") %>%
as.data.frame() %>%
pivot_longer(cols = everything()) %>%
mutate(year = rep(sort(unique(site_metadata_sf$`Monitoring year`)), length.out = nrow(.)),
species = rep(names(species_list_cleaned),
each = length(unique(site_metadata_sf$`Monitoring year`)),
length.out = nrow(.)))
rel_ab_draws %>%
ggplot() +
geom_violin(aes(x = year, y = value), adjust = 2, draw_quantiles = 0.5, fill = "grey95") +
facet_wrap(~species, nrow = 3) +
scale_y_continuous(name = "Unconditional calling activity", limits = c(0,0.52)) +
xlab("Survey year") +
delwp_theme()
Figure 8: Estimated activity accross all songmeter sites per year for each species, unconditional on occupancy (a mixture of both occupancy and calling activity)
We can calculate species-richness at each site by estimating how many (out of the 6 species modelled) are estimated to occupy each site.
sp_r <- fit_beta$summary("species_richness")
sp_r_draws <- t(fit_beta$draws("species_richness", format = "matrix")) %>% as.data.frame()
sp_r_draws$year <- year_code[[1]]
sp_r_draws$Site_f <- site_data[[1]]$Site_f
sp_r_draws_long <- pivot_longer(sp_r_draws, cols = !all_of(c("year", "Site_f"))) %>%
group_by(name, year) %>%
summarise(mean = mean(value)) %>%
ungroup()
sp_r$year <- year_code[[1]]
sp_r$FloodScore <- site_data[[1]]$FloodScore
cols_sep <- unlist(lapply(delwp_cols_seq, function(x) x[c(6,10)]))
sp_r %>%
ggplot() +
ggbeeswarm::geom_quasirandom(aes(y = mean, x = year), data = sp_r_draws_long, shape = 21, size = 1, fill = "grey70", alpha = 0.25, stroke = 0.1) +
ggbeeswarm::geom_quasirandom(aes(y = mean, x = year, fill = FloodScore), shape = 21, size = 2) +
scale_fill_manual(values = unname(delwp_cols)) +
labs(x = "Year", y = "Site-level species richness", fill = "Flood score") +
scale_x_continuous(breaks = 1:6, labels = paste(2018:2023, 2019:2024, sep = " - ")) +
delwp_theme()
Figure 9: Estimated species-richness across sites and over the years. Species richness is grouped according to measured flood scores at that site, in that particular year. The grey points in the background represent the distribution of posterior samples for the average species-richness across all sites from year-to-year